syntax = "proto3";

package tensorflow.quantization;

import "tensorflow/core/framework/tensor.proto";

option cc_enable_arenas = true;

// This file contains the definition of TF GraphDef-level mixed-precision
// quantization configuration. The configuration will be used in the
// quantization path to determine the following factors:
// 1) What will be the quantization method for the model.
// 2) What will be the default quantization precision for the model.
// 3) What will be the quantization precision for each unit (nodes / ops) in the
//    model.

// Model quantization method for optimization.
//
// Various techniques for model quantization are defined within this message
// along with a field that specifies a method to be used for a particular
// quantization request.
// NEXT ID: 4
message QuantizationMethod {
  // Quantization methods that are supported as a stable API.
  enum Method {
    // This should never be used. Using this will generally result in an error.
    METHOD_UNSPECIFIED = 0;  // go/do-include-enum-unspecified
  }

  // Experimental quantization methods.
  // These methods are either not implemented or provided with an unstable
  // behavior.
  enum ExperimentalMethod {
    // This should never be used. Using this will generally result in an error.
    EXPERIMENTAL_METHOD_UNSPECIFIED = 0;  // go/do-include-enum-unspecified

    // Static range quantization. Quantized tensor values' ranges are statically
    // determined.
    STATIC_RANGE = 1;

    // Dynamic range quantization. Quantized tensor values' ranges are
    // determined in the graph executions. The weights are quantized during
    // conversion.
    DYNAMIC_RANGE = 2;

    // Weight-only quantization. Only weights are quantized during conversion.
    WEIGHT_ONLY = 3;
  }

  // Quantization method is one of exprimental or non-experimental.
  oneof method_oneof {
    Method method = 1;
    ExperimentalMethod experimental_method = 2;
  }
  repeated QuantizationComponentSpec quantization_component_specs = 3;
}

// Component spec for quantization.

// Defines tensor type of the component. If the combination is not supported,
// an error will be raised.
// NEXT ID: 3
message QuantizationComponentSpec {
  // NEXT ID: 4
  enum QuantizationComponent {
    COMPONENT_UNSPECIFIED = 0;
    COMPONENT_ACTIVATION = 1;
    COMPONENT_WEIGHT = 2;
    COMPONENT_BIAS = 3;
  }

  // NEXT ID: 4
  enum TensorType {
    TENSORTYPE_UNSPECIFIED = 0;
    TENSORTYPE_INT_4 = 1;
    TENSORTYPE_INT_8 = 2;
    TENSORTYPE_INT_32 = 3;
  }

  // Defines target component.
  QuantizationComponent quantization_component = 1;

  // Defines the target tensor type.
  TensorType tensor_type = 2;
}

// Unit (either nodes or ops at this moment) wise quantization method for
// mixed bit precision quantization. It contains the name of the unit,
// the granularity of the unit, and the quantization method for each unit.
// NEXT ID: 5
message UnitWiseQuantizationSpec {
  // Quantization unit granularity.
  // NEXT ID: 3
  enum UnitType {
    // This should never be used. Using this will generally result in an error.
    UNIT_UNSPECIFIED = 0;
    UNIT_NODE = 1;
    UNIT_OP = 2;
  }

  // Available quantization unit. Currently node-wise and op-wise are
  // available quantization units.
  UnitType unit_type = 1;
  // Uniqueness isn't guaranteed across SavedModels but within each function
  // def's level, uniqueness is guaranteed. Updated
  // the configuration interfaces to reflect such circumstances.
  // If users do not need to guarantee uniqueness func_name can be omitted.
  string func_name = 2;
  string unit_name = 3;

  // Quantization option information for the current unit.
  // TODO(b/241322587): Support specifying quantization method for each unit of
  // TF GraphDef.
  repeated QuantizationComponentSpec quantization_component_spec = 4;
}

// List of supported opsets to deploy the quantized model.
// The quantized model contains different set of ops depending on the opset.
// NEXT ID: 4
enum OpSet {
  OP_SET_UNSPECIFIED = 0;  // go/do-include-enum-unspecified
  // Uses TF ops that mimic quantization behavior. Used when the corresponding
  // integer op is not yet present.
  TF = 1;
  // Uses TF XLA ops
  XLA = 2;
  // Uses TF Uniform Quantized ops
  UNIFORM_QUANTIZED = 3;
}

// Configurations for variable freezing during quantization passes.
// NEXT ID: 2
message FreezeAllVariables {
  // Setting this to true freezes all variables to constants during
  // quantization. Setting this to `false` is an experimental feature and does
  // not have stability guarantees.
  bool enabled = 1;
}

// Defines various calibration options.
// NEXT ID: 3
message CalibrationOptions {
  // Configurations for calibration methods.
  // NEXT ID: 5
  enum CalibrationMethod {
    CALIBRATION_METHOD_UNSPECIFIED = 0;
    // Use the min, max values of all sample datasets.
    CALIBRATION_METHOD_MIN_MAX = 1;
    // Use the average of min, max values in each sample dataset.
    CALIBRATION_METHOD_AVERAGE_MIN_MAX = 2;
    // Use the min/max percentile value of histogram.
    CALIBRATION_METHOD_HISTOGRAM_PERCENTILE = 3;
    // Use the histogram mid values that minimize MSE error.
    // This is very slow algorithm because it computes all errors for all
    // histogram mid value pairs. Therefore the value of num_bins is recommended
    // to be 256 or less.
    CALIBRATION_METHOD_HISTOGRAM_MSE_BRUTEFORCE = 4;
  }

  // Parameters required for calibration.
  // NEXT ID: 4
  message CalibrationParameters {
    // The number of bins when histogram is initialized. It can be increased
    // because histogram is dynamically expanded by sample inputs.
    // initial_num_bins is 256 by default.
    int32 initial_num_bins = 1;
    // min_percentile is only used in HISTOGRAM_PERCENTILE.
    // min_percentile is 0.001 by default.
    float min_percentile = 2;
    // max_percentile is only used in HISTOGRAM_PERCENTILE.
    // max_percentile is 99.999 by default.
    float max_percentile = 3;
  }

  // Determines how to calibrate.
  // The default calibration method is MIN_MAX.
  CalibrationMethod calibration_method = 1;

  // Defines the parameters required for calibration. Parameters such as the
  // number of bins in the histogram and percentile belong to it.
  // MIN_MAX and AVERAGE_MIN_MAX don't require this parameter and methods
  // starting with HISTOGRAM require this parameter.
  CalibrationParameters calibration_parameters = 2;
}

// The data format of each sample in the representative dataset.
message RepresentativeDataSample {
  map<string, TensorProto> tensor_proto_inputs = 2;
}

// Specifies the type and path to the representative dataset.
// NEXT ID: 2
message RepresentativeDatasetFile {
  // Only TfRecord file is supported at the moment but defining this field
  // as oneof so it can be easily extended to support other types.
  oneof dataset_file {
    string tfrecord_file_path = 1;
  }
}

// Configuration for quantization debugger.
// NEXT ID: 4
message DebuggerOptions {
  // Type of quantization debugger. Depending on the type, inputs and outputs
  // are wired differently.
  // NEXT ID: 3
  enum DebuggerType {
    DEBUGGER_TYPE_UNSPECIFIED = 0;
    // DEBUGGER_TYPE_WHOLE_MODEL creates two tf.Savedmodel - unquantized and
    // quantized model with DumpTensor added to outputs of quantizable layers.
    // The DumpTensor dumps entire value of its input to a specified file. When
    // DEBUGGER_TYPE_WHOLE_MODEL is used unquantized_dump_model_path has to be
    // specified.
    DEBUGGER_TYPE_WHOLE_MODEL = 1;
    // DEBUGGER_TYPE_PER_LAYER creates a model with both quantized and original
    // unquantized layer. Unquantized layer's input comes from previous
    // quantized layer. DumpTensor is used save to entire value of outputs from
    // both quantized and unquantized layer.
    DEBUGGER_TYPE_PER_LAYER = 2;
  }

  DebuggerType debugger_type = 1;

  // Path to save unquantized model with dump tensor ops attached.
  // Used when debugger_type is WHOLE_MODEL.
  string unquantized_dump_model_path = 2;

  // Path to save debugger related logs. Defaults to '/tmp/dumps'.
  string log_dir_path = 3;
}

// Defines various options to specify and control the behavior of the quantizer.
// It consists of
// 1) Model-wise quantization configuration as a default configuration. If it is
// None, the default configuration is "do not quantize the model".
// 2) A set of supported operations.
// 3) Unit wise quantization precision.
// 4) Target hardware name.
// NEXT ID: 17
message QuantizationOptions {
  // The default quantization configuration for the model. If the below
  // unit-wise configuration does not exist, we use this quantization
  // configuration for the entire model. For each method, default configuration
  // is:
  // 1) STATIC_RANGE
  //    - COMPONENT_ACTIVATION: INT_8
  //    - COMPONENT_WEIGHT: INT_8
  //    - COMPONENT_BIAS: INT_32
  // 2) WEIGHT_ONLY
  //    - COMPONENT_WEIGHT: INT_8
  // 3) DYNAMIC_RANGE
  //    - COMPONENT_ACTIVATION: INT_8
  //    - COMPONENT_WEIGHT: INT_8
  //    - COMPONENT_BIAS: INT_32
  // And different spec can be specified with quantization_component_specs.
  // If the below unit-wise configuration exists, this default one will become
  // the quantization configuration for units that are not specified in
  // unit-wise configurations.
  QuantizationMethod quantization_method = 1;

  OpSet op_set = 2;  // If not specified, it defaults to `XLA`.

  // Quantization spec for each unit. Units can become either nodes or ops, and
  // the mixture of those different units are allowed. If there are conflicts or
  // ambiguity in this unit-wise precision, our quantizer will raise an error.
  repeated UnitWiseQuantizationSpec unit_wise_quantization_spec = 3;

  // (TF1 SavedModel only) Collection of tags identifying the MetaGraphDef
  // within the SavedModel to analyze. If not specified, ["serve"] is used.
  repeated string tags = 5;

  // Sequence of keys identifying SignatureDef containing inputs and outputs.
  // If not specified, ["serving_default"] is used.
  repeated string signature_keys = 6;

  // A map from signature keys to the corresponding representative dataset.
  map<string, RepresentativeDatasetFile> representative_datasets = 7;

  // Minimum number of weight elements to apply quantization. Currently only
  // supported for Post-training Dynamic Range Quantization. By default, it is
  // set to 1024. To disable this, set the value to -1 explicitly.
  int64 min_num_elements_for_weights = 8;

  // When set to `true`, freezes all variables in the model into constants.
  // When set to `false` the model's large constants are converted to variables.
  // Setting this to `false` is an experimental feature and quantization may
  // fail. To quantize models larger than 2 GiB, this should be set to `false`.
  // If not set, it defaults to `true`.
  FreezeAllVariables freeze_all_variables = 9;

  // Enables chnanel-wise quantizaiton. By default, channel-wise quantization is
  // not applied regardless of the op support. Currently, it is supported for
  // Uniform Quantized opset only.
  bool enable_per_channel_quantization = 10;

  // Enables two inputs of an operation to be both tensors.
  // Currently supports MatMul and BatchMatMul ops for XLA.
  // TODO(b/263528090): Check the condition when this feature is beneficial.
  bool enable_two_input_tensors = 11;

  // Supports TPU model quantization. If the target model for the quantization
  // is already converted for TPU, this flag may be helpful. Note that this
  // feature may be unstable as it is under the experimental stage.
  bool experimental_enable_tpu_model_support = 12;

  // Produces legacy weight-only graph where the qconst op(containing quantized
  // values) is followed by a dequantization op.
  bool enable_legacy_weight_only = 13;

  // If set to true, it forces calibration in graph model instead of eager mode
  // when the context is in eager mode.
  bool force_graph_mode_calibration = 14;

  // Defines calibration options for quantization. This option is only used for
  // activation of static range quantization (SRQ). Quantization calibration
  // method is set to MIN_MAX by default.
  CalibrationOptions calibration_options = 15;

  // Configuration related to quantization debugger.
  DebuggerOptions debugger_options = 16;
}
